/* XXL: The eXtensible and fleXible Library for data processing

Copyright (C) 2000-2011 Prof. Dr. Bernhard Seeger
                        Head of the Database Research Group
                        Department of Mathematics and Computer Science
                        University of Marburg
                        Germany

This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 3 of the License, or (at your option) any later version.

This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.

You should have received a copy of the GNU Lesser General Public
License along with this library;  If not, see <http://www.gnu.org/licenses/>. 

    http://code.google.com/p/xxl/

*/

package xxl.core.cursors.joins;

import java.util.Comparator;
import java.util.Iterator;

import xxl.core.collections.sweepAreas.SortMergeEquiJoinSA;
import xxl.core.collections.sweepAreas.SweepArea;
import xxl.core.functions.Function;

/**
 * A sort-merge implementation of the equivalence-join operator (especially the
 * equi-join). This class provides a generic, untyped sort-merge join
 * algorithm. The join predicate has to be a equivalence-relation and the
 * elements of both input iterations have to be sortable according to an order
 * which provides the members of the equivalence-classes in sequence. In
 * contrast to the upper class {@link xxl.core.cursors.joins.SortMergeJoin},
 * the sort-merge equivalence-join provides support for left-outer, right-outer
 * and full outer joins. The resulting tuples of any join operation are
 * generated by a user defined function realizing a kind of factory method. The
 * binary function <code>newResult</code> can be used to map the result tuples
 * to an arbitrary user defined type. The sweep-line status structure, here
 * called sweep-area, consists of a bag with an additional method for
 * reorganisation. The way the elements of the input iterations are inserted
 * into the according sweep-area is determined by a given comparator. Depending
 * on the result of the comparison of the two next input elements of the input
 * iterations, the left (<code>sortedInput0</code>) or right
 * (<code>sortedInput1</code>) input is processed. If the left input is
 * processed, the sweep-area's of <code>sortedInput0</code> and
 * <code>sortedInput1</code> are reorganized and the next element of
 * <code>sortedInput0</code> is inserted in <code>sweepArea0</code>. After that
 * <code>sweepArea1</code> is queried with this element, i.e., the specified
 * predicate is applied on the elements contained in <code>sweepArea1</code>
 * and the last inserted element of <code>sweepArea0</code>. In order to
 * perform an effective search in the sweep-area, the query method of the bags
 * should be overriden. If the binary predicate returns <code>true</code> the
 * evaluated tuple gets an result of the join operation. After that query the
 * mapping function to create the result-tuples is applied on the results
 * detected by the predicate and after that they are returned to the user. The
 * right input is processed analogous. The implementation is a bit more complex
 * due to additional checks of join-types and the generation of result-tuples
 * where the evaluated join predicate returned <code>false</code>.
 * 
 * <p><b>Note:</b> When the given input iteration only implements the interface
 * {@link Iterator} it is wrapped to a cursor by a call to the static method
 * {@link xxl.core.cursors.Cursors#wrap(Iterator) wrap}.</p>
 * 
 * <p><b>Example usage (1):</b>
 * <code><pre>
 *   SortMergeEquivalenceJoin&lt;Integer, Object[]&gt; join = new SortMergeEquivalenceJoin&lt;Integer, Object[]&gt;(
 *       Arrays.asList(0, 1, 3, 4, 5, 7, 8, 9).iterator(),
 *       Arrays.asList(0, 2, 4, 6, 8, 10).iterator(),
 *       new SortMergeEquiJoinSA&lt;Integer&gt;(
 *           new ListSAImplementor&lt;Integer&gt;(),
 *           0,
 *           2
 *       ),
 *       new SortMergeEquiJoinSA&lt;Integer&gt;(
 *           new ListSAImplementor&lt;Integer&gt;(),
 *           1,
 *           2
 *       ),
 *       ComparableComparator.INTEGER_COMPARATOR,
 *       Tuplify.DEFAULT_INSTANCE,
 *       Type.OUTER_JOIN
 *   );
 *   
 *   join.open();
 *   
 *   while (join.hasNext())
 *       System.out.println(java.util.Arrays.toString(join.next()));
 *   
 *   join.close();
 * </pre></code>
 * The input iterations of this simple example are two list iterators. The
 * first one is based on all odd numbers and numbers that can be divided by 4
 * of the interval [0, 10]. The second input iterator contains all even numbers
 * of the same interval. The join predicate, an
 * {@link xxl.core.predicates.Equal equalality} predicate, is encapsulated in
 * the given sweep-areas based on a {@link java.util.List list}. In order to
 * compare an element of <code>sortedInput0</code> and an element of
 * <code>sortedInput1</code>, a default
 * {@link xxl.core.comparators.ComparableComparator#INTEGER_COMPARATOR comparator}
 * for integers is used. In this example no specific function is specified to
 * create user defined result-tuples. The
 * {@link xxl.core.functions.Tuplify#DEFAULT_INSTANCE identity} function delivers the
 * result-tuples in their original representation, namely as an array
 * containing the matching elements of the input iterations. This can simply be
 * seen when printing the result-tuples to the output stream. But in the
 * package {@link xxl.core.relational} there are different factory methods
 * creating a particular kind of tuple, e.g.,
 * {@link xxl.core.relational.tuples.ArrayTuple array-tuple},
 * {@link xxl.core.relational.tuples.ListTuple list-tuple}, that substitute the
 * tuplify function. So, let us consider the output of this join operation,
 * which looks as follows:
 * <pre>
 *   [0, 0]
 *   [1, null]
 *   [null, 2]
 *   [3, null]
 *   [4, 4]
 *   [5, null]
 *   [null, 6]
 *   [7, null]
 *   [8, 8]
 *   [9, null]
 *   [null, 10]
 * </pre></p>
 * 
 * @param <I> the type of the elements consumed by this iteration.
 * @param <E> the type of the elements returned by this join operation.
 * @see java.util.Iterator
 * @see xxl.core.cursors.Cursor
 * @see xxl.core.cursors.joins.NestedLoopsJoin
 * @see xxl.core.cursors.joins.SortMergeJoin
 */
public class SortMergeEquivalenceJoin<I, E> extends SortMergeJoin<I, E> {

	/**
	 * Creates a new sort-merge equivalence-join operator backed on two sorted
	 * input iterations using the given sweep-areas to store the input
	 * iterations' elements and probe for join results. Furthermore a function
	 * named <code>newResult</code> can be specified that is invoked on each
	 * qualifying tuple before it is returned to the caller concerning a call
	 * to the <code>next</code> method. This function is a kind of factory
	 * method to model the resulting object.
	 * 
	 * <p><b>Precondition:</b> The input cursors have to be sorted and the join
	 * predicate has to be an equivalence-relation!</p>
	 * 
	 * <p>The result-tuples created by this operator are represented as an
	 * array of the input iterations' elements that are participated in this
	 * join result. If the user wants to specify a different result-type, a
	 * mapping function <code>newResult</code> can be specified. This function
	 * works like a kind of factory method modelling the resulting object
	 * (tuple). Further a set of comparators have to be specified, in order to
	 * compare the elements of different input iterations. Every iterator given
	 * to this constructor is wrapped to a cursor.<p>
	 *
	 * @param sortedInput0 the first sorted input iteration to be joined.
	 * @param sortedInput1 the second sorted input iteration to be joined.
	 * @param sweepArea0 the sweep-area used for storing elements of the first
	 *        sorted input iteration (<code>sortedInput0</code>).
	 * @param sweepArea1 the sweep-area used for storing elements of the second
	 *        sorted input iteration (<code>sortedInput1</code>).
	 * @param comparator the comparator that is used for comparing elements of
	 *        the two sorted input iterations.
	 * @param newResult a factory method (function) that takes two parameters
	 *        as argument and is invoked on each tuple where the predicate's
	 *        evaluation result is <code>true</code>, i.e., on each qualifying
	 *        tuple before it is returned to the caller concerning a call to
	 *        the <code>next</code> method.
	 * @param type the type of this join; use one of the enumeration's element
	 *        defined in superclass.
	 * @throws IllegalArgumentException if the specified type is not valid.
	 */
	public SortMergeEquivalenceJoin(Iterator<? extends I> sortedInput0, Iterator<? extends I> sortedInput1, SortMergeEquiJoinSA<I> sweepArea0, SortMergeEquiJoinSA<I> sweepArea1, Comparator<? super I> comparator, Function<? super I, ? extends E> newResult, Type type) throws IllegalArgumentException {
		super(sortedInput0, sortedInput1, sweepArea0, sweepArea1, comparator, newResult);
		this.type = type;
	}

	/**
	 * Creates a new sort-merge equivalence-join operator backed on two input
	 * iterations using the given sweep-areas to store the input iterations'
	 * elements and probe for join results. The constructor does not require
	 * the two input iterations to be sorted. The two specified, unary
	 * functions <code>newSorter0</code> and <code>newSorter1</code> will be
	 * invoked on the corresponding input iteration in order to get a sorted
	 * input. Furthermore a function named <code>newResult</code> can be
	 * specified that is invoked on each qualifying tuple before it is returned
	 * to the caller concerning a call to the <code>next</code> method. This
	 * function is a kind of factory method to model the resulting object.
	 * 
	 * <p><b>Precondition:</b> The join predicate has to be an
	 * equivalence-relation!</p>
	 * 
	 * <p>The result-tuples created by this operator are represented as an
	 * array of the input iterations' elements that are participated in this
	 * join result. If the user wants to specify a different result-type, a
	 * mapping function <code>newResult</code> can be specified. This function
	 * works like a kind of factory method modelling the resulting object
	 * (tuple). Further a set of comparators have to be specified, in order to
	 * compare the elements of different input iterations. Every iterator given
	 * to this constructor is wrapped to a cursor.<p>
	 *
	 * @param input0 the first input iteration to be joined.
	 * @param input1 the second input iteration to be joined.
	 * @param newSorter0 an unary function that sorts the first input iteration
	 *        <code>input0</code>.
	 * @param newSorter1 an unary function that sorts the second input
	 *        iteration <code>input1</code>.
	 * @param sweepArea0 the sweep-area used for storing elements of the first
	 *        input iteration (<code>input0</code>).
	 * @param sweepArea1 the sweep-area used for storing elements of the second
	 *        input iteration (<code>input1</code>).
	 * @param comparator the comparator that is used for comparing elements of
	 *        the two input iterations.
	 * @param newResult a factory method (function) that takes two parameters
	 *        as argument and is invoked on each tuple where the predicate's
	 *        evaluation result is <code>true</code>, i.e., on each qualifying
	 *        tuple before it is returned to the caller concerning a call to
	 *        the <code>next</code> method.
	 * @param type the type of this join; use one of the enumeration's element
	 *        defined in superclass.
	 * @throws IllegalArgumentException if the specified type is not valid.
	 */
	public SortMergeEquivalenceJoin(Iterator<? extends I> input0, Iterator<? extends I> input1, Function<? super Iterator<? extends I>, ? extends Iterator<? extends I>> newSorter0, Function<? super Iterator<? extends I>, ? extends Iterator<? extends I>> newSorter1, SortMergeEquiJoinSA<I> sweepArea0, SortMergeEquiJoinSA<I> sweepArea1, Comparator<? super I> comparator, Function<? super I, ? extends E> newResult, Type type) throws IllegalArgumentException {
		super(input0, input1, newSorter0, newSorter1, sweepArea0, sweepArea1, comparator, newResult);
		this.type = type;
	}

	/**
	 * Creates a new sort-merge equivalence-join operator backed on two sorted
	 * input iterations using a parameterless function to create the required 
	 * sweep-areas that are used to store the input iterations' elements and
	 * probe for join results. Furthermore a function named
	 * <code>newResult</code> can be specified that is invoked on each
	 * qualifying tuple before it is returned to the caller concerning a call
	 * to the <code>next</code> method. This function is a kind of factory
	 * method to model the resulting object.
	 * 
	 * <p><b>Precondition:</b> The input cursors have to be sorted and the join
	 * predicate has to be an equivalence-relation!</p>
	 * 
	 * <p>The result-tuples created by this operator are represented as an
	 * array of the input iterations' elements that are participated in this
	 * join result. If the user wants to specify a different result-type, a
	 * mapping function <code>newResult</code> can be specified. This function
	 * works like a kind of factory method modelling the resulting object
	 * (tuple). Further a set of comparators have to be specified, in order to
	 * compare the elements of different input iterations. Every iterator given
	 * to this constructor is wrapped to a cursor.<p>
	 *
	 * @param sortedInput0 the first sorted input iteration to be joined.
	 * @param sortedInput1 the second sorted input iteration to be joined.
	 * @param newSweepArea a parameterless function creating a new sweep-area
	 *        that is used for storing elements of the sorted input iterations.
	 * @param comparator the comparator that is used for comparing elements of
	 *        the two sorted input iterations.
	 * @param newResult a factory method (function) that takes two parameters
	 *        as argument and is invoked on each tuple where the predicate's
	 *        evaluation result is <code>true</code>, i.e., on each qualifying
	 *        tuple before it is returned to the caller concerning a call to
	 *        the <code>next</code> method.
	 * @param type the type of this join; use one of the enumeration's element
	 *        defined in superclass.
	 * @throws IllegalArgumentException if the specified type is not valid.
	 */
	public SortMergeEquivalenceJoin(Iterator<? extends I> sortedInput0, Iterator<? extends I> sortedInput1, Function<?, ? extends SweepArea<I,I>> newSweepArea, Comparator<? super I> comparator, Function<? super I, ? extends E> newResult, Type type) throws IllegalArgumentException {
		super(sortedInput0, sortedInput1, newSweepArea, comparator, newResult);
		this.type = type;
	}

}
